%run init_notebook.py
from src.utils import load_pd_df, get_dt_index
from src.processing import pd_join_dfs, pd_groupby, xcorr
from src.statsmodels import *
import scipy
import warnings
from statsmodels.tools.sm_exceptions import ValueWarning
warnings.simplefilter('ignore', ValueWarning)
from src.utils import load_pd_df, save_pd_df, get_dt_index, pd_join_freq, Capturing, cross_corr, pd_df_astype, save_fig, get_stars
from src.processing import pd_groupby, pd_join_dfs, adf_test_summary, hausman, plt_stacked_bar, get_individual_perc_error
from src.pymc_modelling import get_samp
import statsmodels.api as sm
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.ar_model import ar_select_order
from statsmodels.tsa.vector_ar.vecm import select_order
from statsmodels.tsa.vector_ar.vecm import VECM, select_coint_rank, select_order
from statsmodels.tsa.stattools import adfuller
from statsmodels.regression.linear_model import OLS
df = load_pd_df("df_analysis.feather", DATA_DIR)
sub1 = pd_groupby(df.set_index('date_recorded').sort_index(), ['pi_de_Y', 'pi_perc_MY',], 'M', 'last')
sub1 = sub1.dropna()
sub1 = sm.add_constant(sub1)
mod1 = OLS(sub1.pi_de_Y, sub1.drop('pi_de_Y', axis=1)).fit()
sub2 = pd_groupby(df.set_index('date_forecast').sort_index(), ['pi_de_Y', 'pi_exp_MY',], 'M', 'last')
sub2 = sub2.dropna()
sub2 = sm.add_constant(sub2)
mod2 = OLS(sub2.pi_de_Y, sub2.drop('pi_de_Y', axis=1)).fit()
sub3 = pd_groupby(df.set_index('date_forecast').sort_index(), ['pi_de_Y_diff', 'delta_pe_MY',], 'M', 'last')
sub3 = sub3.dropna()
sub3 = sm.add_constant(sub3)
mod3 = OLS(sub3.pi_de_Y_diff, sub3.drop('pi_de_Y_diff', axis=1)).fit()
out = get_statsmodels_summary([mod1, mod2, mod3], cols_out=['print', 'conf_lower', 'conf_upper']).round(3)
save_pd_df(out, "tab_rationality_test.csv", GRAPHS_DIR)
out
| conf_lower | conf_upper | ||||||||
|---|---|---|---|---|---|---|---|---|---|
| pi_de_Y | pi_de_Y_1 | pi_de_Y_diff | pi_de_Y | pi_de_Y_1 | pi_de_Y_diff | pi_de_Y | pi_de_Y_1 | pi_de_Y_diff | |
| const | -0.015 **\n[-2.556] | -0.044 ***\n[-7.723] | 0.023 ***\n[4.691] | -0.028 | -0.056 | 0.013 | -0.003 | -0.033 | 0.033 |
| delta_pe_MY | NaN | NaN | -1.238 **\n[-2.656] | NaN | NaN | -2.187 | NaN | NaN | -0.289 |
| pi_exp_MY | NaN | 1.811 ***\n[17.538] | NaN | NaN | 1.602 | NaN | NaN | 2.021 | NaN |
| pi_perc_MY | 1.334 ***\n[12.794] | NaN | NaN | 1.122 | NaN | NaN | 1.546 | NaN | NaN |
| N | 34.0 | 36.0 | 34.0 | 34.000 | 36.000 | 34.000 | 34.000 | 36.000 | 34.000 |
| R^2 | 0.836 | 0.9 | 0.181 | 0.836 | 0.900 | 0.181 | 0.836 | 0.900 | 0.181 |
| R^2 adj. | 0.831 | 0.898 | 0.155 | 0.831 | 0.898 | 0.155 | 0.831 | 0.898 | 0.155 |
import pymc as pm
import arviz as az
with pm.Model() as mod:
alpha = pm.Normal('alpha', 0, 10, shape=(1,))
beta = pm.Normal('beta', 0, 10, shape=(1,))
sigma = pm.HalfNormal('sigma', 2, shape=(1,))
mu = pm.Deterministic('mu', alpha + sub2.pi_exp_MY.values[:,None] @ beta)
pm.Normal('likelihood', mu=mu, sigma=sigma, observed=sub2.pi_de_Y)
idata = pm.sample(nuts_sampler='numpyro')
posterior = pm.sample_posterior_predictive(idata)
C:\Users\LukasGrahl\miniforge3\envs\mamba_env_memoire2\Lib\site-packages\pymc\sampling\mcmc.py:273: UserWarning: Use of external NUTS sampler is still experimental
warnings.warn("Use of external NUTS sampler is still experimental", UserWarning)
Compiling... Compilation time = 0:00:05.151223 Sampling...
0%| | 0/2000 [00:00<?, ?it/s]
0%| | 0/2000 [00:00<?, ?it/s]
0%| | 0/2000 [00:00<?, ?it/s]
0%| | 0/2000 [00:00<?, ?it/s]
Sampling time = 0:00:09.862072 Transforming variables... Transformation time = 0:00:00.265760
Sampling: [likelihood]
az.plot_trace(idata, var_names=['alpha', 'beta', 'sigma'])
plt.tight_layout()
;
''
sub = df.set_index('date_recorded')[
['delta_pe_MY_error_act_MY', 'pi_perc_MY_all_error_act_MY', 'pi_de_surprise_Y', 'pi_de_estimate_Y', 'pi_de_Y',]
].drop_duplicates()
sub2 = df.set_index('date_recorded')[
['delta_pe_MY_error_act_WY', 'pi_perc_MY_all_error_act_WY', 'T_sum_ind_op_diff']
].drop_duplicates()
_, _, _, fig = xcorr(sub2.pi_perc_MY_all_error_act_WY, dpi=300, n_lags=25, figsize=(9,4));
fig, ax = fig
save_fig(fig, 'emp_pi_perc_acorr.png')
_, _, _, fig = xcorr(sub2.delta_pe_MY_error_act_WY, dpi=300, n_lags=25, figsize=(9,4));
fig, ax = fig
save_fig(fig, 'emp_delta_pe_acorr.png')
_, _, _, fig = xcorr(sub.pi_de_estimate_Y, dpi=300, n_lags=35, figsize=(9,4));
fig, ax = fig
save_fig(fig, 'emp_pi_structural_acorr.png')
_, _, _, fig = xcorr(sub2.T_sum_ind_op_diff.dropna(), dpi=300, n_lags=35, figsize=(9,4));
fig, ax = fig
save_fig(fig, 'emp_T_sum_acorr.png')
df_sub = pd_groupby(df.set_index('date_recorded'),
['pi_de_surprise_Y', 'delta_pe_MY', 'pi_perc_MY_error_act',
'pi_de_surprise_Y_lead1', 'pi_de_estimate_Y_lead1', 'pi_perc_MY_S_error_act', 'i_exp_WY',
'pi_de_estimate_Y', 'pi_exp_MY', 'pi_perc_MY', 'T_sum_ind_op', 'T_sum_ind_op_diff', 'T_sum_raw']
, 'M', 'last')
df_sub = pd_join_freq(df_sub,
pd_groupby(df.set_index('date_recorded'), ['delta_pe_MY_error_act_MY', 'pi_perc_uncertainty_MY',], 'M', 'mean'),
'M',
)
df_sub['pi_perc_MY_error_act'] /= 100
df_sub['delta_pe_MY_error_act_MY'] /= 10
t_sum_var = 'T_sum_ind_op'
cols_endog, cols_exog = ['pi_perc_MY'], [t_sum_var, 'pi_de_surprise_Y', 'pi_de_estimate_Y', 'pi_exp_MY',]
sub = df_sub[cols_endog+cols_exog].dropna().copy()
for i in sub[cols_endog + cols_exog]:
print(i, f"ADF p-val: {adfuller(sub[i].dropna())[1]}")
jtest = select_coint_rank(sub[cols_endog + cols_exog], -1, 4) # signif=0.05)
print(jtest.rank)
var_order = select_order(sub[cols_endog + cols_exog].diff().dropna(), maxlags=4, deterministic='ci')
print(var_order.aic, var_order.bic)
vecm1 = VECM(sub[cols_endog + cols_exog].dropna(), coint_rank=jtest.rank, k_ar_diff=var_order.aic).fit()
save_fig(vecm1.irf().plot(figsize=(20,20)), 'fig_emp_macro_vecm_pi_perc.png');
pi_perc_MY ADF p-val: 0.01874377846322591 T_sum_ind_op ADF p-val: 0.906058472555874 pi_de_surprise_Y ADF p-val: 0.10964410772978911 pi_de_estimate_Y ADF p-val: 0.5890090275327853 pi_exp_MY ADF p-val: 0.14479079097337977 4 2 0
cols_endog, cols_exog = ['pi_perc_MY_error_act'], [t_sum_var, 'pi_de_surprise_Y', 'pi_de_estimate_Y', 'pi_exp_MY',]
sub = df_sub[cols_endog+cols_exog].dropna().copy()
for i in sub[cols_endog + cols_exog]:
print(i, f"ADF p-val: {adfuller(sub[i].dropna())[1]}")
jtest = select_coint_rank(sub[cols_endog + cols_exog], -1, 4) # signif=0.05)
print(jtest.rank)
var_order = select_order(sub[cols_endog + cols_exog].diff().dropna(), maxlags=4)
print(var_order.aic, var_order.bic)
vecm2 = VECM(sub[cols_endog + cols_exog].dropna(), coint_rank=jtest.rank, k_ar_diff=var_order.aic).fit()
save_fig(vecm2.irf().plot(figsize=(20,20)), 'fig_emp_macro_vecm_pi_perc_resid.png');
pi_perc_MY_error_act ADF p-val: 1.974540165527414e-08 T_sum_ind_op ADF p-val: 0.9283540570773853 pi_de_surprise_Y ADF p-val: 0.29697134223635535 pi_de_estimate_Y ADF p-val: 0.7775251260405094 pi_exp_MY ADF p-val: 0.2551156035612694 3 3 0
cols_endog, cols_exog = ['delta_pe_MY'], [t_sum_var, 'pi_de_surprise_Y', 'pi_de_estimate_Y', 'pi_exp_MY']
sub = df_sub[cols_endog+cols_exog].dropna().copy()
for i in sub[cols_endog + cols_exog]:
print(i, f"ADF p-val: {adfuller(sub[i].dropna())[1]}")
jtest = select_coint_rank(sub[cols_endog + cols_exog], -1, 4) # signif=0.05)
print(jtest.rank)
var_order = select_order(sub[cols_endog + cols_exog].diff().dropna(), maxlags=4)
print(var_order.aic, var_order.bic)
vecm3 = VECM(sub[cols_endog + cols_exog].dropna(), coint_rank=jtest.rank, k_ar_diff=var_order.aic).fit()
save_fig(vecm3.irf().plot(figsize=(20,20)), 'fig_emp_macro_vecm_delta_pe.png');
delta_pe_MY ADF p-val: 0.5619457365131322 T_sum_ind_op ADF p-val: 0.906058472555874 pi_de_surprise_Y ADF p-val: 0.10964410772978911 pi_de_estimate_Y ADF p-val: 0.5890090275327853 pi_exp_MY ADF p-val: 0.14479079097337977 5 3 1
cols_endog, cols_exog = ['delta_pe_MY_error_act_MY'], [t_sum_var, 'pi_de_surprise_Y', 'pi_de_estimate_Y', 'pi_exp_MY']
sub = df_sub[cols_endog+cols_exog].dropna().copy()
for i in sub[cols_endog + cols_exog]:
print(i, f"ADF p-val: {adfuller(sub[i].dropna())[1]}")
jtest = select_coint_rank(sub[cols_endog + cols_exog], -1, 4) # signif=0.05)
print(jtest.rank)
var_order = select_order(sub[cols_endog + cols_exog].diff().dropna(), maxlags=4)
print(var_order.aic, var_order.bic)
vecm4 = VECM(sub[cols_endog + cols_exog].dropna(), coint_rank=jtest.rank, k_ar_diff=var_order.aic).fit()
save_fig(vecm4.irf().plot(figsize=(20,20)), 'fig_emp_macro_vecm_delta_pe_resid.png');
delta_pe_MY_error_act_MY ADF p-val: 0.5446227785790462 T_sum_ind_op ADF p-val: 0.9283540570773853 pi_de_surprise_Y ADF p-val: 0.29697134223635535 pi_de_estimate_Y ADF p-val: 0.7775251260405094 pi_exp_MY ADF p-val: 0.2551156035612694 3 2 0
cols_endog, cols_exog = ['pi_perc_MY'], [t_sum_var, 'pi_de_surprise_Y_lead1', 'pi_de_estimate_Y_lead1', 'pi_exp_MY', ]
sub = df_sub[cols_endog+cols_exog].dropna().copy()
for i in sub[cols_endog + cols_exog]:
print(i, f"ADF p-val: {adfuller(sub[i].dropna())[1]}")
jtest = select_coint_rank(sub[cols_endog + cols_exog], -1, 4) # signif=0.05)
print(jtest.rank)
var_order = select_order(sub[cols_endog + cols_exog].diff().dropna(), maxlags=4)
print(var_order.aic, var_order.bic)
vecm5 = VECM(sub[cols_endog + cols_exog].dropna(), coint_rank=jtest.rank, k_ar_diff=var_order.aic).fit()
save_fig(vecm5.irf().plot(figsize=(20,20)), 'fig_emp_macro_vecm_pi_percS.png');
pi_perc_MY ADF p-val: 0.01874377846322591 T_sum_ind_op ADF p-val: 0.906058472555874 pi_de_surprise_Y_lead1 ADF p-val: 0.24946359680678376 pi_de_estimate_Y_lead1 ADF p-val: 0.7867507163884697 pi_exp_MY ADF p-val: 0.14479079097337977 4 3 3
cols_endog, cols_exog = ['pi_perc_MY_S_error_act'], [t_sum_var, 'pi_de_surprise_Y_lead1', 'pi_de_estimate_Y_lead1', 'pi_exp_MY']
sub = df_sub[cols_endog+cols_exog].dropna()
for i in sub[cols_endog + cols_exog]:
print(i, f"ADF p-val: {adfuller(sub[i].dropna())[1]}")
jtest = select_coint_rank(sub[cols_endog + cols_exog], -1, 4) # signif=0.05)
print(jtest.rank)
var_order = select_order(sub[cols_endog + cols_exog].diff().dropna(), maxlags=4)
print(var_order.aic, var_order.bic)
vecm6 = VECM(sub[cols_endog + cols_exog].dropna(), coint_rank=jtest.rank, k_ar_diff=var_order.aic).fit()
save_fig(vecm6.irf().plot(figsize=(20,20)), 'fig_emp_macro_vecm_pi_perc_residS.png');
pi_perc_MY_S_error_act ADF p-val: 5.314272487890989e-05 T_sum_ind_op ADF p-val: 0.9283540570773853 pi_de_surprise_Y_lead1 ADF p-val: 0.40575633837933295 pi_de_estimate_Y_lead1 ADF p-val: 0.8003606318626139 pi_exp_MY ADF p-val: 0.2551156035612694 4 2 0
lst_vecms = [vecm1, vecm2, vecm5, vecm6, vecm3, vecm4]
fig = get_multiple_vecm_irfs(lst_vecms,
dict_titles={'pi_de_surprise_Y': "$\pi^{surp}_t$",
"T_sum": "$T^{sum}_t$",
"delta_pe_MY_error_act_MY": '$\~{\pi}^{\Delta e}_{t|t+12}$',
"pi_perc_MY": "$\pi^p_t$", "delta_pe_MY": "$\pi^{\Delta e}_{t|t+12}$",
"pi_perc_MY_error_act": "$\~{\pi}^p_{t}$",
"pi_uncertainty_MY": "$\pi^{pu}_t$",
"pi_de_estimate_Y": "$\pi^{structural}_t$",
"pi_de_estimate_Y_lead1": "$\pi^{structural}_{t+1}$",
"pi_perc_MY_S_error_act": "$\~{\pi}^p_{t|t+1}$",
},
idx_vecm=(0, 3),
n_cols=2,
irf_periods=10,
figsize=(8,8),);
fig.suptitle("VECM IRF structural inflation")
fig.tight_layout()
save_fig(fig, "emp_vecm_error_news_pi_struct.png")
fig = get_multiple_vecm_irfs(lst_vecms,
dict_titles={'pi_de_surprise_Y': "$\pi^{surp}_t$",
"pi_de_surprise_Y_lead1": "$\pi^{surp}_{t+1}$",
"T_sum": "$T^{sum}_t$",
"delta_pe_MY_error_act_MY": '$\~{\pi}^{\Delta e}_{t|t+12}$',
"pi_perc_MY": "$\pi^p_t$", "delta_pe_MY": "$\pi^{\Delta e}_{t|t+12}$",
"pi_perc_MY_error_act": "$\~{\pi}^p_{t}$",
"pi_uncertainty_MY": "$\pi^{pu}_t$",
"pi_perc_MY_S_error_act": "$\~{\pi}^p_{t|t+1}$",
},
idx_vecm=(0,2),
n_cols=2,
irf_periods=10,
figsize=(8,8),);
fig.suptitle("VECM IRF surprise inflation")
fig.tight_layout()
save_fig(fig, "emp_vecm_error_news_pi_surp.png", GRAPHS_DIR)
fig = get_multiple_vecm_irfs(lst_vecms,
dict_titles={'pi_de_surprise_Y': "$\pi^{surp}_t$",
"T_sum_ind_op": "$N_t$",
"delta_pe_MY_error_act_MY": '$\~{\pi}^{\Delta e}_{t|t+12}$',
"pi_perc_MY": "$\pi^p_t$", "delta_pe_MY": "$\pi^{\Delta e}_{t|t+12}$",
"pi_perc_MY_error_act": "$\~{\pi}^p_{t}$",
"pi_uncertainty_MY": "$\pi^{pu}_t$",
"pi_perc_MY_S_error_act": "$\~{\pi}^p_{t|t+1}$",
},
n_cols=2,
irf_periods=10,
figsize=(8,8),);
fig.suptitle("VECM IRF news index")
fig.tight_layout()
save_fig(fig, "emp_vecm_error_news.png")
sub = df.groupby('week_recorded')[['pi_perc_WY', 'pi_exp_WY', 'T_sum_ind_op']].mean().diff()
fig, ax = get_fig_subplots()
ax.plot(sub.pi_perc_WY)
ax.plot(sub.pi_exp_WY)
ax.twinx().plot(sub.T_sum_ind_op, color='red')
fig.legend(['perc', 'delta', 'T'])
<matplotlib.legend.Legend at 0x239184b1650>
out = get_statsmodels_summary(lst_vecms, seperator=" ", is_filt_sig=True)
save_pd_df(out, 'tab_vecm_news_resid.csv', GRAPHS_DIR)
out
| delta_pe_MY | delta_pe_MY_error_act_MY | pi_perc_MY | pi_perc_MY_2 | pi_perc_MY_S_error_act | pi_perc_MY_error_act | |
|---|---|---|---|---|---|---|
| L2.pi_de_surprise_Y | 0.3892 *** [3.319] | -0.0089 [-0.188] | -0.013 [-0.486] | NaN | NaN | 0.1255 [0.575] |
| L3.pi_de_surprise_Y | 0.146 ** [2.391] | NaN | NaN | NaN | NaN | 0.0311 [0.206] |
| L3.pi_de_estimate_Y_lead1 | NaN | NaN | NaN | -0.1753 *** [-3.482] | NaN | NaN |
| L3.delta_pe_MY | -0.3204 ** [-2.427] | NaN | NaN | NaN | NaN | NaN |
| L3.T_sum_ind_op | -0.0194 *** [-4.768] | NaN | NaN | -0.008 *** [-3.812] | NaN | 0.0197 * [1.86] |
| L2.pi_perc_MY_error_act | NaN | NaN | NaN | NaN | NaN | 0.633 *** [2.808] |
| L2.pi_exp_MY | -1.083 *** [-3.724] | 0.181 [1.023] | -0.2573 ** [-2.14] | -0.3428 * [-1.674] | 37.3555 [0.744] | 1.4423 * [1.789] |
| L2.pi_de_surprise_Y_lead1 | NaN | NaN | NaN | -0.1995 *** [-3.475] | -14.2369 [-1.038] | NaN |
| L2.pi_de_estimate_Y_lead1 | NaN | NaN | NaN | -0.205 *** [-3.321] | -27.0656 [-1.483] | NaN |
| L3.pi_de_surprise_Y_lead1 | NaN | NaN | NaN | -0.1093 *** [-3.263] | NaN | NaN |
| L2.delta_pe_MY | 0.5196 *** [2.79] | NaN | NaN | NaN | NaN | NaN |
| L1.pi_perc_MY_error_act | NaN | NaN | NaN | NaN | NaN | 0.9641 *** [2.897] |
| L1.pi_exp_MY | 0.4722 * [1.775] | -0.1212 [-0.774] | 0.3004 *** [3.453] | 0.3556 *** [3.008] | -15.6152 [-0.348] | -2.0342 *** [-3.503] |
| L1.pi_de_surprise_Y_lead1 | NaN | NaN | NaN | -0.2628 *** [-3.209] | -9.3422 [-0.583] | NaN |
| L1.pi_de_surprise_Y | 0.3812 *** [3.047] | -0.0056 [-0.098] | 0.0049 [0.115] | NaN | NaN | 0.2325 [0.982] |
| L1.pi_de_estimate_Y | -0.5449 *** [-4.357] | 0.1414 ** [2.31] | -0.0013 [-0.029] | NaN | NaN | -0.6496 *** [-2.907] |
| L1.delta_pe_MY_error_act_MY | NaN | 0.5332 *** [2.785] | NaN | NaN | NaN | NaN |
| L1.delta_pe_MY | 1.2789 *** [8.246] | NaN | NaN | NaN | NaN | NaN |
| L1.T_sum_ind_op | -0.0303 *** [-6.536] | -0.0077 *** [-2.696] | -0.0028 [-1.538] | -0.0116 *** [-3.853] | 1.5312 ** [2.481] | 0.0261 *** [3.324] |
| L2.T_sum_ind_op | -0.0347 *** [-8.524] | -0.0048 [-1.541] | -0.0001 [-0.077] | -0.0085 *** [-2.855] | -1.8973 ** [-2.361] | -0.0061 [-0.528] |
| L3.pi_exp_MY | -1.9025 *** [-4.835] | NaN | NaN | -0.3677 ** [-2.125] | NaN | 0.4616 [0.784] |
| N lags | 4 | 3 | 3 | 4 | 3 | 4 |
| Coint. rank | 5 | 3 | 4 | 4 | 4 | 3 |
| Normality | True *** [18.307] | True [18.307] | True [18.307] | True [18.307] | True [18.307] | True [18.307] |
| N | 30 | 33 | 31 | 30 | 33 | 32 |
| Whiteness | False *** [179.581] | True [217.735] | False ** [212.304] | False *** [185.052] | True [212.304] | False *** [190.516] |